---
title: "R Notebook"
output:
  html_document:
    df_print: paged
---


# Cleaning and pre-processing

```{r, message=FALSE}
library(readtext)
library(keyATM)
library(quanteda)
library(tidyverse)
library(rworldmap)
library(RColorBrewer)
library(haven)
library(ranger)
library(vip)
library(rpart)
library(rpart.plot)
library(jtools)
library(caret)
```


## Loading
```{r}

ndc_files <- readtext("../NDC_corpus/*", 
                                 docvarsfrom = "filenames", docvarnames = "Country")


ndc_files$doc_id <- str_replace(ndc_files$doc_id , ".txt", "") 
```


## Covariate structure

```{r}
covariates <- read_dta("covariates.dta") %>% 
  select(iso, logpopn, smallislanddevelopingstatessids, logGDPpccurr, fh_ipolity2, currenthealthexpenditureofgdpshx, coalrentsofgdpnygdpcoalrtzs, tempchangesummpopn_112, airpolln_who, finalhealthscore) %>%
  rename(Country=iso) %>% 
  add_column(HES_1=NA) %>% 
  add_column(HES_2=NA) %>% 
  add_column(HES_3=NA) %>% 
  add_column(HES_4=NA)



covariates <- covariates %>%
  mutate(
    HES_1 = case_when(
      finalhealthscore > 0 ~ 1,
      TRUE                 ~ 0
    )
  )

covariates <- covariates %>%
  mutate(
    HES_2 = case_when(
      finalhealthscore > 1 ~ 1,
      TRUE                 ~ 0
    )
  )

covariates <- covariates %>%
  mutate(
    HES_3 = case_when(
      finalhealthscore > 2 ~ 1,
      TRUE                 ~ 0
    )
  )

covariates <- covariates %>%
  mutate(
    HES_4 = case_when(
      finalhealthscore > 3 ~ 1,
      TRUE                 ~ 0
    )
  )


full_set <- left_join(ndc_files, covariates, by = "Country")


```

## Corpus

```{r}
ndc_corpus <- corpus(full_set, text_field = "text") 
```


```{r}
corp_summary <- summarise(group_by(summary(ndc_corpus, n = 158),Country), total_sentences=sum(Sentences),total_words=sum(Tokens))

```



## Tokenisation
###Dictionary for country names to remove:
```{r}
dict_newsmap <- dictionary(file = "./newsmap.yml")
```


### Tokenising and cleaning
```{r}
tokens <- tokens(ndc_corpus, what = "word",
              remove_punct = TRUE,
              remove_symbols = TRUE,
              remove_numbers = TRUE,
              remove_url = TRUE,
              split_hyphens = TRUE,
              verbose = TRUE) %>%
  tokens_tolower() %>%
  tokens_select(dict_newsmap, selection = "remove", padding = FALSE, valuetype = "glob", verbose = TRUE) %>%
  tokens_select(stopwords("english"), selection = "remove", padding = FALSE, verbose = TRUE) %>%
  tokens_select(c("[\\d-]", "[[:punct:]]", "^.{1}$", 
                  "fy", "ut", "bn", "mt", "eq", "ac", "kg", "usd", "cp", 
                  "cabo", "verde", "islamic", "republic", "lao", "pdr", "dpr", "korea",
                  "page", "graph", "approx", "informal", "translation", "please", "refer",
                  "spanish", "version", "report", "deliverable", "final", "per", "cent",
                  "ha", "year", "gg", "pak", "sf", "slm", "africaõs", 
                  "eu", "member", "state", "european", "union", "federal", 
                  "moi", "notre", "anrep", "hindu", "ands", "kush", 
                  "erda", "dame", "archive", "can", "september", "november", "march"),
                selection = "remove", 
                valuetype="regex", 
                min_nchar = 2L,
                verbose = TRUE)
  

```

#Keyness

```{r}
#figure in appendix

dfm_HES_1 <- tokens %>%
  tokens_ngrams(n = 2) %>% 
  dfm(groups = "HES_1")
  

cb_col <- c("#999999", "#E69F00")

pdf("wordcloud_dfm_HES_1.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_HES_1, comparison = TRUE, max_words = 200,
                   color = cb_col, labelsize = 0)
dev.off()

#keyness analysis
keyness_hes_1 <-  textstat_keyness(dfm_HES_1, target = 2)

#plotting keyness
keyplot <- textplot_keyness(keyness_hes_1, 
                            margin = 0.2, labelsize = 2, 
                            color = cb_col, 
                            n = 30) + 
#  ggtitle("0 vs 1-5") +
  theme(legend.position="none")  

ggsave(keyplot,filename="keyness_hes_1.pdf", width = 7, height = 7)

```


```{r}
#figure in the main paper

dfm_HES_2 <- tokens %>%
  tokens_ngrams(n = 2) %>% 
  dfm(groups = "HES_2")
  

cb_col <- c("#999999", "#E69F00")

pdf("wordcloud_dfm_HES_2.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_HES_2, comparison = TRUE, max_words = 200,
                   color = cb_col, labelsize = 0)
dev.off()


#keyness analysis
keyness_hes_2 <-  textstat_keyness(dfm_HES_2, target = 2)

#plotting keyness
keyplot <- textplot_keyness(keyness_hes_2, 
                            margin = 0.2, labelsize = 2, 
                            color = c("red", "darkblue"), 
                            n = 40) + 
#  ggtitle("0-1 vs 2-5") +
theme(legend.position="none") 

ggsave(keyplot,filename="keyness_hes_2.pdf", width = 7, height = 7)


setEPS()
postscript("keyness.eps")

keyplot

dev.off()

```


```{r}

#figure in appendix


dfm_HES_3 <- tokens %>%
  tokens_ngrams(n = 2) %>% 
  dfm(groups = "HES_3")
  

cb_col <- c("#999999", "#E69F00")

pdf("wordcloud_dfm_HES_3.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_HES_3, comparison = TRUE, max_words = 200,
                   color = cb_col, labelsize = 0)
dev.off()


#keyness analysis
keyness_hes_3 <-  textstat_keyness(dfm_HES_3, target = 2)

#plotting keyness
keyplot <- textplot_keyness(keyness_hes_3, 
                            margin = 0.2, labelsize = 2, 
                            color = c("red", "darkblue"), 
                            n = 40) + 
#  ggtitle("0-2 vs 3-5") +
theme(legend.position="none") 

ggsave(keyplot,filename="keyness_hes_3.pdf", width = 7, height = 7)

```
 


```{r}

#figure in appendix


dfm_HES_4 <- tokens %>%
  tokens_ngrams(n = 2) %>% 
  dfm(groups = "HES_4")
  

cb_col <- c("#999999", "#E69F00")

pdf("wordcloud_dfm_HES_4.pdf", width = 7, height = 7)

textplot_wordcloud(dfm_HES_4, comparison = TRUE, max_words = 200,
                   color = cb_col, labelsize = 0)
dev.off()


#keyness analysis
keyness_hes_4 <-  textstat_keyness(dfm_HES_4, target = 2)

#plotting keyness
keyplot <- textplot_keyness(keyness_hes_4, 
                            margin = 0.2, labelsize = 2, 
                            color = c("red", "darkblue"), 
                            n = 40) + 
#  ggtitle("0-3 vs 4-5") +
theme(legend.position="none") 

ggsave(keyplot,filename="keyness_hes_4.pdf", width = 7, height = 7)

```

 


# Dictionary
Creating the dictionary of health terms:

```{r}
health_dict <- dictionary(list(health = c("malaria", "dengue", "diarrhoea", "diarrhea", "diarrhoeal", "diarrheal", "infection", "disease", "diseases", "virus", "sars", "measles", "pneumonia", "epidemic","epidemics","pandemic","pandemics","epidemiology","healthcare","health","mortality", "mortalities","morbidity","nutrition", "illness", "illnesses", "ncd", "ncds", "nutrition", "malnutrition", "malnourishment", "mental_disorder","mental_disorders", "stunting", "medical", "loss_life", "loss_lives","death", "deaths","killed", "wellbeing", "well-being", "hiv", "aids", "sti", "waterborne", "vectorborne", "immunization")))
```



#Count results

## DFM creation

```{r}
ndc_dfm <- tokens %>%  
  tokens_select(min_nchar = 3) %>% 
  tokens_ngrams(n = 1:2) %>% 
  dfm()
```


## Counting the mentions
```{r}
count_detailed <- convert(dfm_select(ndc_dfm, pattern = health_dict), to = "data.frame")



count <- convert(dfm_lookup(ndc_dfm, health_dict, valuetype = "fixed"), to = "data.frame")


health_count <- count %>% rename(Country=document) %>% 
  left_join(corp_summary, by = "Country") 

eu_value <- health_count %>% filter(Country == "LVA")

EU <- as.data.frame(t(data.frame("BEL", "FRA", "DEU", "ITA", "LUX", "NLD", "DNK", "IRL", "GBR", "GRC", "ESP", "PRT", "AUT", "FIN", "SWE", "CZE", "HUN", "POL", "EST", "LTU", "CYP", "MLT", "SVK", "SVN", "BGR", "ROU", "HRV"))) %>% rename(Country=V1) %>% add_column(eu_value) %>% select(-Country.1)


health_count <- health_count %>% bind_rows(EU) %>% arrange(Country)

health_count$percent <- health_count$health/health_count$total_words*100

```



```{r}
readr::write_csv(health_count, "health_count.csv")
readr::write_csv(count_detailed, "count_detailed.csv")
```


#KWIC

```{r}
tok.hea <- kwic(tokens, health_dict, window = 25, valuetype = "fixed")

readr::write_csv(tok.hea, "health_kwic_25_fixed.csv")

```




# Topic Model

## STM topic model


```{r eval=FALSE}
stm_dfm <- convert(ndc_dfm, to = "stm",  docvars = docvars(ndc_corpus))
```


```{r eval=FALSE}
library(stm)
search <- searchK(stm_dfm$documents, stm_dfm$vocab, 
                  K = c(2:50), 
                #  prevalence = ~ factor(Country) + s(Year) + factor(eu_total),
                  data = stm_dfm$meta)

search_results <- as.data.frame(t(do.call(rbind.data.frame, search$results)))

write_csv(temp, "search_results.csv")
```


```{r eval=FALSE}
search_results <- read_csv("search_results.csv")

ggplot(search_results, aes(x=semcoh, y=exclus)) +
    geom_point(size=5, shape =1, color = "green") +
  geom_text(aes(label=K), size=2) +
   geom_smooth(method="lm", se = FALSE, color = "red", size = .3) +
  geom_vline(xintercept = mean(search_results$semcoh), size = .2, linetype="dashed") +
    geom_hline(yintercept = mean(search_results$exclus), size = .2, linetype="dashed") +
  theme_bw() +
 # ggtitle("Selecting optimal number of topics") + 
  xlab("Semantic coherence") + ylab("Exclusivity")

ggsave("optimal_topics.pdf")


ggplot(search_results, aes(x=K, y=heldout)) + 
  geom_line(size = 1.5, alpha = 0.7, show.legend = FALSE) +
  labs(x = "K (number of topics)",
       y = "Held-out likelihood",
       title = "Model diagnostics by number of topics")

ggplot(search_results, aes(x=K, y=residual)) + 
  geom_point(size = 1.5, alpha = 0.7, show.legend = FALSE) +
  labs(x = "K (number of topics)",
       y = "Residuals",
       title = "Model diagnostics by number of topics")
```


## Preparing data




```{r}
keywords <- list( health = c("malaria", "dengue", "diarrhoea", "diarrhea", "diarrhoeal", "diarrheal", "infection", "disease", "diseases", "virus", "sars", "measles", "pneumonia", "epidemic","epidemics","pandemic","pandemics","epidemiology","healthcare","health","mortality", "mortalities","morbidity","nutrition", "illness", "illnesses", "ncd", "ncds", "nutrition", "malnutrition", "malnourishment", "mental_disorder","mental_disorders", "stunting", "medical", "loss_life", "loss_lives","death", "deaths","killed", "wellbeing", "well-being", "hiv", "aids", "sti", "waterborne", "vectorborne", "immunization"), 
                  economy = c("extensive_development", "economy", "gdp", "economic_growth", "economic_development", "developing", "economy", "economic", "imf", "economists", "fuel", "green_growth", "low_carbon", "cost", "market", "forecast", "income", "wealth", "fair", "fairness", "developed", "growth_model", "resources", "needs", "economic_progress", "prices", "macroeconomic", "urbanization", "economically", "industry", "industries", "economic_damage", "economies", "invest", "investment", "investments", "fdi", "employment", "unemployment", "unemployed"),
                  energy = c("energy", "security", "energy_access", "demand", "carbon", "technology", "technologies", "power_plants", "coal_based", "got", "clean_energy", "gw", "solar", "electricity_generation", "electrification", "clean_coal", "thermal", "grid", "green_energy", "efficiency", "power", "energy_efficiency", "energy_efficient", "market", "fuel", "energy_conversation", "energy_saving", "energy_industries"),
                  landagriculture = c("agriculture", "agricultural", "nadf", "ecologists", "food", "environment", "arid", "drought", "soil", "livestock", "agricultural_investment", "food_security", "land", "land_management", "ecology", "ecologists", "crops", "crop", "resistant", "productivity", "ecosystems", "plants", "livelihood", "livelihoods", "grains", "food_supplies", "sustainable_agriculture", "genotypes", "water", "biodiversity", "genetics", "climate_resilient", "resource", "natural_resource", "resource_management", "soil_testing", "agroforestry", "forestry", "deforestation", "rural", "farming", "cropping", "forest", "tree", "land_use"))
```


```{r}
# Read texts into keyATM
ATM_docs <- keyATM_read(ndc_dfm)
```

```{r}
visualize_keywords(ATM_docs, keywords[4]) 

ggsave("keywords_visualisation_four_agri.pdf")
```



## KeyATM topic model

```{r}
output <- keyATM(docs            = ATM_docs,    
                 no_keyword_topics = 2,       
                 keywords          = keywords, 
                 model             = "base",  
                 options           = list(seed = 123))

top_words(output, n = 50, measure = "lift", show_keyword = TRUE) %>% write_csv("top_words_four_2.csv")
```



```{r}
plot_modelfit(output)
```



```{r}
topic_proportions_6 <- data.frame(output$theta)

topics <- cbind(full_set$Country, topic_proportions_6) %>% rename(Country = `full_set$Country`)

eu_value_topic <- topics  %>% filter(Country == "LVA")

EU_topics <- as.data.frame(t(data.frame("BEL", "FRA", "DEU", "ITA", "LUX", "NLD", "DNK", "IRL", "GBR", "GRC", "ESP", "PRT", "AUT", "FIN", "SWE", "CZE", "HUN", "POL", "EST", "LTU", "CYP", "MLT", "SVK", "SVN", "BGR", "ROU", "HRV"))) %>% rename(Country=V1) %>% add_column(eu_value_topic) %>% select(-Country.1)


topics <- topics %>% bind_rows(EU_topics) %>% arrange(Country)

write_csv(topics, "topics.csv")
```



```{r}
topics <- read_csv("topics.csv")


# The palette with grey:
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73")

# The palette with black:
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")



topics %>% 
  select(Country, starts_with("X")) %>% 
  reshape2::melt() %>%
  ggplot(aes(x=value, fill=variable)) + 
  geom_density(alpha=0.3, position="identity") +
  theme_bw() +
  xlab("Topic proportion") + ylab("kernel density") +
 scale_fill_manual(name="Topics",values=cbPalette,
                   labels=c("Health","Economy", "Energy", "Agriculture")) +
#  ggtitle("Topic proportions") + 
  ggsave("topic_proportions_density.pdf")

```




```{r}
#figure in the paper

setEPS()
postscript("topics_freqpoly.eps")

topics %>% 
  select(Country, starts_with("X")) %>% 
  reshape2::melt() %>%
  ggplot(aes(x=value, colour=variable, linetype = variable)) + 
  geom_freqpoly() +
  theme_bw() +
  xlab("Topic proportion") + ylab("Number of NDCs") +
 scale_colour_manual(name="Topics",values=cbPalette,
                   labels=c("Health","Economy", "Energy", "Agriculture")) +
scale_linetype_manual(name="Topics", values=c("twodash", "dotted", "solid", "dashed"), 
                      labels=c("Health","Economy", "Energy", "Agriculture"))+
 # guides(linetype=FALSE) + 
#  ggtitle("Topic proportions") + 
  ggsave("topic_proportions_freqpoly.pdf")

dev.off()

```


#########################
# Statistical analysis
#########################

## data preparation

```{r}
model_data <- left_join(health_count, covariates, by = "Country") %>% left_join(topics, by = "Country")

model_data <- model_data %>% drop_na() %>% rename(health_count=health, 
                                                  health_percent=percent, 
                                                  population = logpopn, 
                                                  sids = smallislanddevelopingstatessids,
                                                  income = logGDPpccurr, 
                                                  democracy = fh_ipolity2, 
                                                  health_expenditure = currenthealthexpenditureofgdpshx, 
                                                  coal_rents = coalrentsofgdpnygdpcoalrtzs, 
                                                  temperature_change = tempchangesummpopn_112, 
                                                  air_pollution = airpolln_who, 
                                                  HES = finalhealthscore, 
                                                  health_topic = X1_health, 
                                                  economy_topic = X2_economy,
                                                  energy_topic = X3_energy,
                                                  agri_topic = X4_landagriculture)

model_data$sids <- factor(model_data$sids)


EU <- c("BEL", "FRA", "DEU", "ITA", "LUX", "NLD", "DNK", "IRL", "GBR", "GRC", "ESP", "PRT", "AUT", "FIN", "SWE", "CZE", "HUN", "POL", "EST", "LTU","LVA", "CYP", "MLT", "SVK", "SVN", "BGR", "ROU", "HRV")

model_data <- model_data %>% mutate(eu = as.factor(Country %in% EU)) 

model_data$eu <- recode(model_data$eu, `TRUE` = "EU", `FALSE` = "Non-EU")

write_csv(model_data, "model_data.csv")

```



```{r}
library(stargazer)

stargazer(model_data, omit.summary.stat = c("p25", "p75"), type = "text", out = "summary_stats.txt")

```



## Linear model


### core regression analysis




```{r}
reg_model_health_topic <- lm(formula = health_topic ~ population + sids + income +
    democracy + health_expenditure + coal_rents + 
    temperature_change + air_pollution,
    data = model_data)

export_summs(reg_model_health_topic,
     model.names = "Health Topic",
     coefs = c("Population" = "population", 
               "SIDS" = "sids1",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
     robust = "HC1", 
     confint = TRUE, 
     digits = 3,
     error_pos = "right",
     error_format = "({conf.low}, {conf.high}, p = {p.value})",
     statistics = c(N = "nobs", R2 = "r.squared", AdjR2 = "adj.r.squared", AIC = "AIC"),
 to.file = "docx", file.name = "health_topic_regression.docx")

```



```{r}
reg_model_economy_topic <- lm(formula = economy_topic ~ population + sids + income +
    democracy + health_expenditure + coal_rents + 
    temperature_change + air_pollution,
    data = model_data)

export_summs(reg_model_economy_topic,
     model.names = "Economy Topic",
     coefs = c("Population" = "population", 
               "SIDS" = "sids1",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
     robust = "HC1", 
     confint = TRUE, 
     digits = 3,
     error_pos = "right",
     error_format = "({conf.low}, {conf.high}, p = {p.value})",
     statistics = c(N = "nobs", R2 = "r.squared", AdjR2 = "adj.r.squared", AIC = "AIC"),
 to.file = "docx", file.name = "economy_topic_regression.docx")

```



```{r}
reg_model_energy_topic <- lm(formula = energy_topic ~ population + sids + income +
    democracy + health_expenditure + coal_rents + 
    temperature_change + air_pollution ,
    data = model_data)

export_summs(reg_model_energy_topic,
     model.names = "Energy Topic",
     coefs = c("Population" = "population", 
               "SIDS" = "sids1",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
     robust = "HC1", 
     confint = TRUE, 
     digits = 3,
     error_pos = "right",
     error_format = "({conf.low}, {conf.high}, p = {p.value})",
     statistics = c(N = "nobs", R2 = "r.squared", AdjR2 = "adj.r.squared", AIC = "AIC"),
 to.file = "docx", file.name = "energy_topic_regression.docx")

```


```{r}
reg_model_agri_topic <- lm(formula = agri_topic ~ population + sids + income +
    democracy + health_expenditure + coal_rents + 
    temperature_change + air_pollution,
    data = model_data)

export_summs(reg_model_agri_topic,
     model.names = "Agriculture Topic",
     coefs = c("Population" = "population", 
               "SIDS" = "sids1",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
     robust = "HC1", 
     confint = TRUE, 
     digits = 3,
     error_pos = "right",
     error_format = "({conf.low}, {conf.high}, p = {p.value})",
     statistics = c(N = "nobs", R2 = "r.squared", AdjR2 = "adj.r.squared", AIC = "AIC"),
 to.file = "docx", file.name = "agri_topic_regression.docx")

```


```{r}
reg_model_HES <- lm(formula = HES ~ population + sids + income +
    democracy + health_expenditure + coal_rents + 
    temperature_change + air_pollution,
    data = model_data)

export_summs(reg_model_HES,
     model.names = "HES measure",
     coefs = c("Population" = "population", 
               "SIDS" = "sids",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
     robust = "HC1", 
     confint = TRUE, 
     digits = 3,
     error_pos = "right",
     error_format = "({conf.low}, {conf.high}, p = {p.value})",
     statistics = c(N = "nobs", R2 = "r.squared", AdjR2 = "adj.r.squared", AIC = "AIC"),
 to.file = "docx", file.name = "hes_regression.docx")

```



```{r}
reg_model_HES <- lm(formula = HES ~ total_words + population + sids + income +
    democracy + health_expenditure + coal_rents + 
    temperature_change + air_pollution,
    data = model_data)

export_summs(reg_model_HES,
     model.names = "HES measure",
     coefs = c("Total words" = "total_words", "Population" = "population", 
               "SIDS" = "sids",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
     robust = "HC1", 
     confint = TRUE, 
     digits = 3,
     error_pos = "right",
     error_format = "({conf.low}, {conf.high}, p = {p.value})",
     statistics = c(N = "nobs", R2 = "r.squared", AdjR2 = "adj.r.squared", AIC = "AIC"),
 to.file = "docx", file.name = "hes_regression_total_words.docx")

```


```{r}
reg_model_percent <- lm(formula = health_percent ~ population + sids + income +
    democracy + health_expenditure + coal_rents + 
    temperature_change + air_pollution,
    data = model_data)

export_summs(reg_model_percent,
     model.names = "Percent measure",
     coefs = c("Population" = "population", 
               "SIDS" = "sids1",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
     robust = "HC1", 
     confint = TRUE, 
     digits = 3,
     error_pos = "right",
     error_format = "({conf.low}, {conf.high}, p = {p.value})",
     statistics = c(N = "nobs", R2 = "r.squared", AdjR2 = "adj.r.squared", AIC = "AIC"),
 to.file = "docx", file.name = "percent_regression.docx")

```



```{r}
reg_model_poisson <- glm(formula = health_count ~ population + sids + income +
    democracy + health_expenditure + coal_rents + 
    temperature_change + air_pollution,
    family = "poisson",
    data = model_data)

export_summs(reg_model_poisson,
     model.names = "Count measure",
     coefs = c("Population" = "population", 
               "SIDS" = "sids1",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
     robust = "HC1", 
     confint = TRUE, 
     digits = 3,
     error_pos = "right",
     error_format = "({conf.low}, {conf.high}, p = {p.value})",
     statistics = c(N = "nobs", pseudoR2 = "pseudo.r.squared", 
                    McFaddenR2 = "pseudo.r.squared.mcfadden", AIC = "AIC"),
 to.file = "docx", file.name = "count_regression.docx")

```



```{r}
plot_summs(reg_model_health_topic, reg_model_economy_topic, reg_model_energy_topic, reg_model_agri_topic,
           inner_ci_level = .9, 
           robust = "HC1", 
#           scale = TRUE,
#           transform.response = TRUE,
           coefs = c("Population" = "population", 
               "SIDS" = "sids1",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
           model.names = c("Health", "Economy", "Energy", "Agriculture"))
ggsave("effects.pdf")
```




```{r}

HES_logistic <- glm(formula = HES_binary ~ population + sids + income +
    democracy + health_expenditure + coal_rents + 
    temperature_change + air_pollution, 
    family="binomial", 
    data = model_data)

```


```{r}
export_summs(HES_logistic,
     model.names = "logistic regression",
     coefs = c("Population" = "population", 
               "SIDS" = "sids1",
               "GDP per capita" = "income",
               "Democracy" = "democracy",
               "Health Expenditure" = "health_expenditure",
               "Coal Rents" = "coal_rents", 
               "Temperature Change" = "temperature_change",
               "Air Pollution" = "air_pollution"),
 #    robust = "HC1", 
     confint = TRUE, 
     digits = 3,
     error_pos = "right",
     error_format = "({conf.low}, {conf.high}, p = {p.value})",
     statistics = c(N = "nobs", AIC = "AIC", 
                    pseudoR2 = "pseudo.r.squared", McFaddenR2 = "pseudo.r.squared.mcfadden"),
 to.file = "docx", file.name = "logistic_regression.docx")

```






### heteroskedasticity tests

#### Health topic


```{r}
lmtest::bptest(reg_model_health_topic) #rejection of the null hypothesis of homoskedasticity

olsrr::ols_test_breusch_pagan(reg_model_health_topic, rhs = TRUE, multiple = TRUE, p.adj = "bonferroni")

olsrr::ols_test_score(reg_model_health_topic)

olsrr::ols_test_f(reg_model_health_topic)
```


#### HES

```{r}
lmtest::bptest(reg_model_HES) #rejection of the null hypothesis of homoskedasticity

olsrr::ols_test_breusch_pagan(reg_model_HES, rhs = TRUE, multiple = TRUE, p.adj = "bonferroni")

olsrr::ols_test_score(reg_model_HES)

olsrr::ols_test_f(reg_model_HES)

```






#################################
#Maps
#################################

## topics

```{r}


map <- joinCountryData2Map(topics, joinCode="ISO3", nameJoinColumn="Country")

new_world <- subset(map, continent != "Antarctica")


pdf("worldmap_health_topic.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")

mapParams <- mapCountryData(new_world, nameColumnToPlot="X1_health", 
                            mapTitle="Health Topic Proportion", 
                            catMethod = "fixedWidth", 
                            colourPalette = "heat", 
                            oceanCol = "lightblue", 
                            missingCountryCol = "white", 
                            addLegend="FALSE")

#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( mapParams, legendLabels="all", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 3,
                          horizontal = FALSE,
                          digits = 2))
dev.off()
```



```{r}


map <- joinCountryData2Map(topics, joinCode="ISO3", nameJoinColumn="Country")

new_world <- subset(map, continent != "Antarctica")


pdf("worldmap_econ_topic.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")

mapParams <- mapCountryData(new_world, nameColumnToPlot="X2_economy", 
                            mapTitle="Economy Topic Proportion", 
                            catMethod = "fixedWidth", 
                            colourPalette = "heat", 
                            oceanCol = "lightblue", 
                            missingCountryCol = "white", 
                            addLegend="FALSE")

#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( mapParams, legendLabels="all", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 3,
                          horizontal = FALSE,
                          digits = 2))
dev.off()
```




```{r}


map <- joinCountryData2Map(topics, joinCode="ISO3", nameJoinColumn="Country")

new_world <- subset(map, continent != "Antarctica")


pdf("worldmap_energy_topic.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")

mapParams <- mapCountryData(new_world, nameColumnToPlot="X3_energy", 
                            mapTitle="Energy Topic Proportion", 
                            catMethod = "fixedWidth", 
                            colourPalette = "heat", 
                            oceanCol = "lightblue", 
                            missingCountryCol = "white", 
                            addLegend="FALSE")

#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( mapParams, legendLabels="all", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 3,
                          horizontal = FALSE,
                          digits = 2))
dev.off()
```


```{r}


map <- joinCountryData2Map(topics, joinCode="ISO3", nameJoinColumn="Country")

new_world <- subset(map, continent != "Antarctica")


pdf("worldmap_agri_topic.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")

mapParams <- mapCountryData(new_world, nameColumnToPlot="X4_landagriculture", 
                            mapTitle="Agriculture Topic Proportion", 
                            catMethod = "fixedWidth", 
                            colourPalette = "heat", 
                            oceanCol = "lightblue", 
                            missingCountryCol = "white", 
                            addLegend="FALSE")

#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( mapParams, legendLabels="all", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 3,
                          horizontal = FALSE,
                          digits = 2))
dev.off()
```

## count

```{r}


map <- joinCountryData2Map(health_count, joinCode="ISO3", nameJoinColumn="Country")

new_world <- subset(map, continent != "Antarctica")


pdf("worldmap.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")

mapParams <- mapCountryData(new_world, nameColumnToPlot="health", 
                            mapTitle="Health Count", 
                            catMethod = "logFixedWidth", 
                            colourPalette = "heat", 
                            oceanCol = "lightblue", 
                            missingCountryCol = "white", 
                            addLegend="FALSE")

#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( mapParams, legendLabels="all", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 2,
                          horizontal = FALSE,
                          digits = 0))
dev.off()
```


## percent

```{r}

map <- joinCountryData2Map(health_count, joinCode="ISO3", nameJoinColumn="Country")

new_world <- subset(map, continent != "Antarctica")

pdf("worldmap2.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")

mapParams <- mapCountryData(new_world, nameColumnToPlot="percent", 
                            mapTitle="Health Mentions Percent of Statement", 
                            catMethod = "logFixedWidth", 
                            colourPalette = "heat", 
                            oceanCol = "lightblue", 
                            missingCountryCol = "white", 
                            addLegend="FALSE")

do.call( addMapLegend, c( mapParams, legendLabels="all", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 2.5,
                          horizontal = FALSE,
                          digits = 2))

dev.off()
```

##HES

```{r}


map <- joinCountryData2Map(model_data, joinCode="ISO3", nameJoinColumn="Country")

new_world <- subset(map, continent != "Antarctica")


pdf("worldmap_hes.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")

mapParams <- mapCountryData(new_world, nameColumnToPlot="HES", 
                            mapTitle="HES Measure", 
                            numCats = 5,
                            catMethod = "logFixedWidth", 
                            colourPalette = "heat", 
                            oceanCol = "lightblue", 
                            missingCountryCol = "white", 
                            addLegend="FALSE")

#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( mapParams, legendLabels="limits", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 2,
                          horizontal = FALSE,
                          digits = 0))
dev.off()
```

##GBD maps

```{r}

pdf("GBDmap_topic.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")


GBMparams <- mapByRegion(topics, 
             nameDataColumn="X1_health", 
             joinCode="ISO3", 
             nameJoinColumn="Country", 
             regionType="GBD", 
             FUN="mean", 
             mapTitle="Health Topic Proportion by GBM Region", 
             addLegend="FALSE",
             catMethod = "fixedWidth", 
            colourPalette = "heat", 
            oceanCol = "lightblue",
            missingCountryCol = "white")


#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( GBMparams, legendLabels="all", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 3,
                          horizontal = FALSE,
                          digits = 2))
dev.off()
```



```{r}

pdf("GBDmap.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")


GBMparams <- mapByRegion(health_count, 
             nameDataColumn="health", 
             joinCode="ISO3", 
             nameJoinColumn="Country", 
             regionType="GBD", 
             FUN="mean", 
             mapTitle="Health Count by GBM Region", 
             addLegend="FALSE",
             catMethod = "logFixedWidth", 
            colourPalette = "heat", 
            oceanCol = "lightblue",
            missingCountryCol = "white")


#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( GBMparams, legendLabels="all", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 2,
                          horizontal = FALSE,
                          digits = 0))
dev.off()
```






```{r}

pdf("GBDmap2.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")


GBMparams2 <- mapByRegion(health_count, 
             nameDataColumn="percent", 
             joinCode="ISO3", 
             mapTitle="Health Mentions Percent of Statement by GBM Region", 
             nameJoinColumn="Country", 
             regionType="GBD", 
             FUN="mean", 
             addLegend="FALSE",
             catMethod = "logFixedWidth", 
            colourPalette = "heat", 
            oceanCol = "lightblue",
            missingCountryCol = "white")


#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( GBMparams2, legendLabels="all", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 2.5,
                          horizontal = FALSE,
                          digits = 2))
dev.off()
```


```{r}

pdf("GBDmap_hes.pdf", width = 7, height = 3)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")


GBMparams <- mapByRegion(model_data, 
             nameDataColumn="HES", 
             joinCode="ISO3", 
             nameJoinColumn="Country", 
             regionType="GBD", 
             FUN="mean", 
             mapTitle="HES Measure by GBM Region", 
             addLegend="FALSE",
             numCats = 3,
             catMethod = "logFixedWidth", 
            colourPalette = "heat", 
            oceanCol = "lightblue",
            missingCountryCol = "white")


#do.call( addMapLegend, c(mapParams,title="Number of mentions",x = "bottomleft", horiz=FALSE, cex=0.5))
do.call( addMapLegend, c( GBMparams, legendLabels="limits", 
                          legendWidth=0.5,
                          legendShrink = 0.3,
                          tcl = -0.3,
                          labelFontSize = 0.3, 
                          legendMar = 2,
                          horizontal = FALSE,
                          digits = 0))
dev.off()
```









